options(markdown.HTML.header = unlist(sapply(system.file("misc", c("vignette.css",
"datatables.txt"), package = "knitr"), readLines)))
summary(BCELL)
## Sample Center File Population
## 12828:504 Baylor :216 1228-1_C1_C01.fcs : 24 Lymphocytes:189
## 1349 :504 CIMR :216 1228-2_C2_C02.fcs : 24 CD19 :189
## 1369 :504 Miami :216 1228-3_C3_C03.fcs : 24 CD20 :189
## NHLBI :216 12828_1_B CELL.fcs : 24 Naive :189
## Stanford:216 12828_1_Bcell_C01.fcs: 24 Memory IgD+:189
## UCLA :216 12828_2_B CELL.fcs : 24 Memory IgD-:189
## Yale :216 (Other) :1368 (Other) :378
## Proportion Method Replicate lp
## Min. :0.0069 Manual :504 Min. :1 Min. :-4.966
## 1st Qu.:0.0836 flowDensity:504 1st Qu.:1 1st Qu.:-2.394
## Median :0.1600 OpenCyto :504 Median :2 Median :-1.658
## Mean :0.2889 Mean :2 Mean :-0.902
## 3rd Qu.:0.4754 3rd Qu.:3 3rd Qu.:-0.099
## Max. :1.0000 Max. :3 Max. :11.513
##
## logp
## Min. :-4.975
## 1st Qu.:-2.482
## Median :-1.833
## Mean :-1.736
## 3rd Qu.:-0.744
## Max. : 0.000
##
NAs in the dataunique(BCELL[is.na(Proportion), list(Center, File, Method)])
## Empty data.table (0 rows) of 3 cols: Center,File,Method
## NULL
unique(BCELL[Proportion > 1, list(Center, Population, Method)])
## Empty data.table (0 rows) of 3 cols: Center,Population,Method
## NULL
NAs come from Yale, and the file is not defined. This seems to be some missing data.| Lymphocytes | CD19 | CD20 | Naive | Memory IgD+ | Memory IgD- | Transitional | Plasmablasts | |
|---|---|---|---|---|---|---|---|---|
| Manual | 63 | 63 | 63 | 63 | 63 | 63 | 63 | 63 |
| flowDensity | 63 | 63 | 63 | 63 | 63 | 63 | 63 | 63 |
| OpenCyto | 63 | 63 | 63 | 63 | 63 | 63 | 63 | 63 |
range(m$value)=[0.0069, 1]
BCELL[, `:=`(Replicate, gl(nrow(.SD), 1)), list(Sample, Center, Population,
Method)]
## Sample Center File Population Proportion
## 1: 12828 Baylor 12828_1_B CELL.fcs Lymphocytes 0.4940
## 2: 12828 Baylor 12828_2_B CELL.fcs Lymphocytes 0.4840
## 3: 12828 Baylor 12828_3_B CELL.fcs Lymphocytes 0.4840
## 4: 12828 CIMR B_CELL_12828_P1.fcs Lymphocytes 0.4390
## 5: 12828 CIMR B_CELL_12828_001_P1.fcs Lymphocytes 0.4440
## ---
## 1508: 1349 Miami lot 1349_C5_C05.fcs Plasmablasts 0.2453
## 1509: 1349 Miami lot 1349_C6_C06.fcs Plasmablasts 0.4194
## 1510: 1369 Miami lot 1369_C7_C07.fcs Plasmablasts 0.4400
## 1511: 1369 Miami lot 1369_C8_C08.fcs Plasmablasts 0.4667
## 1512: 1369 Miami lot 1369_C9_C09.fcs Plasmablasts 0.5000
## Method Replicate lp logp
## 1: Manual 1 -0.02400 -0.7052
## 2: Manual 2 -0.06402 -0.7257
## 3: Manual 3 -0.06402 -0.7257
## 4: Manual 1 -0.24522 -0.8233
## 5: Manual 2 -0.22494 -0.8119
## ---
## 1508: OpenCyto 2 -1.12390 -1.4053
## 1509: OpenCyto 3 -0.32542 -0.8690
## 1510: OpenCyto 1 -0.24116 -0.8210
## 1511: OpenCyto 2 -0.13353 -0.7621
## 1512: OpenCyto 3 0.00000 -0.6931
We want to model variability between centers, between subjects, and contrast gating methods for each cell population.
df <- cast(BCELL, Sample + Center + Method ~ Population + Replicate, value = "Proportion")
BCELL <- BCELL[, `:=`(lp, logit(Proportion, adjust = 1e-05))]
BCELL <- BCELL[, `:=`(logp, log(Proportion))]
pops <- levels(BCELL$Population)
setkey(BCELL, Population)
ggplot(BCELL[pops[c(3, 5, 8)]]) + geom_boxplot(aes(y = Proportion, x = Center,
fill = Method)) + facet_grid(Population ~ Sample, scales = "free") + theme(axis.text.x = element_text(angle = 45,
hjust = 1)) + ggtitle("Raw B-cell data")
How we'll model this is the following. We'll have fixed effects for gating methods, cell populations and their interactions. That is becausewe want to esimate the effec of each gating method on each population.
We fit a random intercept for Sample and Center as well as for each level of Population:Center and Population:Sample. The idea here is that cell population estimates will vary from center to center and from sample to sample, by more than just a fixed offset.
We fit the reponse (proportions) on the logit scale.
# Estimate fixed effects for population and method and their interaction
# Random effects for center and sample, as random intercept for each
# population:center and population:Sample
mer <- lmer(lp ~ Population * Method + (1 | Center/Population) + (1 | Sample/Population),
BCELL[Population != "Lymphocytes"], REML = FALSE, verbose = FALSE)
We note several things:
## Sample Center File
## 1349 :756 Baylor :378 1228-1_A1_A01.fcs : 42
## 1369 :756 CIMR : 0 1228-2_A2_A02.fcs : 42
## 12828:756 Miami :378 1228-3_A3_A03.fcs : 42
## NHLBI :378 12828_1_A1_A01.fcs : 42
## Stanford:378 12828_1_T CELL.fcs : 42
## UCLA :378 12828_1_Tcell_A01.fcs: 42
## Yale :378 (Other) :2016
## Population Proportion Method Replicate
## Lymphocytes : 162 Min. :0.0004 Manual :756 Min. :1
## CD3 : 162 1st Qu.:0.0579 flowDensity:756 1st Qu.:1
## CD4 : 162 Median :0.3061 OpenCyto :756 Median :2
## CD4 Activated : 162 Mean :0.3084 Mean :2
## CD4 Naive : 162 3rd Qu.:0.4622 3rd Qu.:3
## CD4 Central Memory: 162 Max. :1.0000 Max. :3
## (Other) :1296
## lp logp
## Min. :-7.788 Min. :-7.813
## 1st Qu.:-2.789 1st Qu.:-2.849
## Median :-0.819 Median :-1.184
## Mean :-1.144 Mean :-1.814
## 3rd Qu.:-0.152 3rd Qu.:-0.772
## Max. :11.513 Max. : 0.000
##
NAs again.m <- melt(TCELLS, id = c("Sample", "Center", "Population", "Method"), measure = "Proportion")
kable(cast(m, Method ~ Population), format = "html", table.attr = "id=\"tcell_balance\"")
## Aggregation requires fun.aggregate: length used as default
| Lymphocytes | CD3 | CD4 | CD4 Activated | CD4 Naive | CD4 Central Memory | CD4 Effector Memory | CD4 Effector | CD8 | CD8 Activated | CD8 Naive | CD8 Central Memory | CD8 Effector Memory | CD8 Effector | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Manual | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 |
| flowDensity | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 |
| OpenCyto | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 | 54 |
For some reason there are more observations from flowDensity and Manual gating than OpenCyto.
kable(cast(m, Method ~ Center), format = "html", table.attr = "id=\"tcell_centers\"")
## Aggregation requires fun.aggregate: length used as default
| Baylor | Miami | NHLBI | Stanford | UCLA | Yale | |
|---|---|---|---|---|---|---|
| Manual | 126 | 126 | 126 | 126 | 126 | 126 |
| flowDensity | 126 | 126 | 126 | 126 | 126 | 126 |
| OpenCyto | 126 | 126 | 126 | 126 | 126 | 126 |
The range of the data looks okay.
range(m$value)=[4.0431 × 10-4, 1]
TCELLS[, `:=`(Replicate, gl(nrow(.SD), 1)), list(Sample, Center, Population,
Method)]
## Sample Center File Population Proportion
## 1: 12828 Baylor 12828_1_T CELL.fcs Lymphocytes 0.23800
## 2: 12828 Baylor 12828_2_T CELL.fcs Lymphocytes 0.49100
## 3: 12828 Baylor 12828_3_T CELL.fcs Lymphocytes 0.48100
## 4: 12828 Miami lot 12828_A1_A01.fcs Lymphocytes 0.61500
## 5: 12828 Miami lot 12828_A2_A02.fcs Lymphocytes 0.55700
## ---
## 2264: 1349 UCLA TCELL 22013_1349_002.fcs CD8 Effector 0.16178
## 2265: 1349 UCLA TCELL 22013_1349_003.fcs CD8 Effector 0.15292
## 2266: 1369 UCLA TCELL 22013_1369_001.fcs CD8 Effector 0.05122
## 2267: 1369 UCLA TCELL 22013_1369_002.fcs CD8 Effector 0.05292
## 2268: 1369 UCLA TCELL 22013_1369_003.fcs CD8 Effector 0.07474
## Method Replicate lp logp
## 1: Manual 1 -1.16365 -1.4355
## 2: Manual 2 -0.03600 -0.7113
## 3: Manual 3 -0.07604 -0.7319
## 4: Manual 1 0.46837 -0.4861
## 5: Manual 2 0.22899 -0.5852
## ---
## 2264: OpenCyto 2 -1.64499 -1.8215
## 2265: OpenCyto 3 -1.71186 -1.8779
## 2266: OpenCyto 1 -2.91892 -2.9717
## 2267: OpenCyto 2 -2.88437 -2.9389
## 2268: OpenCyto 3 -2.51596 -2.5938
TCELLS <- TCELLS[Center != "CIMR"] #drop CIMR
df <- cast(TCELLS, Sample + Center + Method ~ Population + Replicate, value = "Proportion")
TCELLS <- TCELLS[, `:=`(lp, logit(Proportion, adjust = 1e-05))]
TCELLS <- TCELLS[, `:=`(logp, log(Proportion))]
pops <- levels(TCELLS$Population)
setkey(TCELLS, Population)
ggplot(TCELLS[pops[c(3, 5, 12)]]) + geom_boxplot(aes(y = Proportion, x = Center,
fill = Method)) + facet_grid(Population ~ Sample, scales = "free") + theme(axis.text.x = element_text(angle = 45,
hjust = 1)) + ggtitle("Raw T-cell data")
We fit the mixed model to the T-cell panel.
mer <- lmer(lp ~ Population * Method + (1 | Center/Population) + (1 | Sample/Population),
TCELLS[Population != "Lymphocytes"], REML = FALSE, verbose = FALSE)
A couple of the CD8 populations exhibit some bias, but again, they are more consistent across subjects and centers than the manual gating.
####################################### #######################################
## Sample Center File
## 1349 :504 Baylor :216 1228-1_E1_E01.fcs : 24
## 1369 :504 CIMR :216 1228-2_E2_E02.fcs : 24
## 12828:504 Miami :216 1228-3_E3_E03.fcs : 24
## NHLBI :216 12828_1_E1_E01.fcs : 24
## Stanford:216 12828_1_TH1,2f,2,2f,17.fcs: 24
## UCLA :216 12828_1_Thelper_E01.fcs : 24
## Yale :216 (Other) :1368
## Population Proportion Method Replicate
## CD4 Activated:189 Min. :0.0001 Manual :504 Min. :1
## CD4 Th1 :189 1st Qu.:0.0181 flowDensity:504 1st Qu.:1
## CD4 Th2 :189 Median :0.0478 OpenCyto :504 Median :2
## CD4 Th17 :189 Mean :0.2511 Mean :2
## CD8 Activated:189 3rd Qu.:0.4678 3rd Qu.:3
## CD8 Th1 :189 Max. :0.9984 Max. :3
## (Other) :378
## lp logp
## Min. :-9.044 Min. :-9.132
## 1st Qu.:-3.991 1st Qu.:-4.010
## Median :-2.992 Median :-3.041
## Mean :-2.125 Mean :-2.666
## 3rd Qu.:-0.129 3rd Qu.:-0.760
## Max. : 6.449 Max. :-0.002
##
NAs again.m <- melt(THELPER, id = c("Sample", "Center", "Population", "Method"), measure = "Proportion")
kable(cast(m, Method ~ Population), format = "html", table.attr = "id=\"thelper_balance\"")
## Aggregation requires fun.aggregate: length used as default
| CD4 Activated | CD4 Th1 | CD4 Th2 | CD4 Th17 | CD8 Activated | CD8 Th1 | CD8 Th2 | CD8 Th17 | |
|---|---|---|---|---|---|---|---|---|
| Manual | 63 | 63 | 63 | 63 | 63 | 63 | 63 | 63 |
| flowDensity | 63 | 63 | 63 | 63 | 63 | 63 | 63 | 63 |
| OpenCyto | 63 | 63 | 63 | 63 | 63 | 63 | 63 | 63 |
kable(cast(m, Method ~ Center), format = "html", table.attr = "id=\"thelper_centers\"")
## Aggregation requires fun.aggregate: length used as default
| Baylor | CIMR | Miami | NHLBI | Stanford | UCLA | Yale | |
|---|---|---|---|---|---|---|---|
| Manual | 72 | 72 | 72 | 72 | 72 | 72 | 72 |
| flowDensity | 72 | 72 | 72 | 72 | 72 | 72 | 72 |
| OpenCyto | 72 | 72 | 72 | 72 | 72 | 72 | 72 |
kable(cast(m, Method ~ Population), format = "html", table.attr = "id=\"thelper_populations\"")
## Aggregation requires fun.aggregate: length used as default
| CD4 Activated | CD4 Th1 | CD4 Th2 | CD4 Th17 | CD8 Activated | CD8 Th1 | CD8 Th2 | CD8 Th17 | |
|---|---|---|---|---|---|---|---|---|
| Manual | 63 | 63 | 63 | 63 | 63 | 63 | 63 | 63 |
| flowDensity | 63 | 63 | 63 | 63 | 63 | 63 | 63 | 63 |
| OpenCyto | 63 | 63 | 63 | 63 | 63 | 63 | 63 | 63 |
Things look balanced, and we have removed the Lymphocytes, CD8, CD4, and CD3 cells since they are not terminal populations.
The range of the data looks okay.
range(m$value)=[1.0811 × 10-4, 0.9984]
THELPER[, `:=`(Replicate, gl(nrow(.SD), 1)), list(Sample, Center, Population,
Method)]
## Sample Center File Population Proportion
## 1: 12828 Baylor 12828_1_TH1,2f,2,2f,17.fcs CD4 Activated 0.0275000
## 2: 12828 Baylor 12828_2_TH1,2f,2,2f,17.fcs CD4 Activated 0.0317000
## 3: 12828 Baylor 12828_3_TH1,2f,2,2f,17.fcs CD4 Activated 0.0309000
## 4: 12828 CIMR TH1_TH2_TH17_12828_P1.fcs CD4 Activated 0.0150000
## 5: 12828 CIMR TH1_TH2_TH17_12828_001_P1.fcs CD4 Activated 0.0142000
## ---
## 1508: 1349 CIMR TH1_TH2_TH17_1349_002_P1.fcs CD8 Th17 0.0003786
## 1509: 1349 CIMR TH1_TH2_TH17_1349_P1.fcs CD8 Th17 0.0003435
## 1510: 1369 CIMR TH1_TH2_TH17_1369_001_P1.fcs CD8 Th17 0.0046840
## 1511: 1369 CIMR TH1_TH2_TH17_1369_002_P1.fcs CD8 Th17 0.0050342
## 1512: 1369 CIMR TH1_TH2_TH17_1369_P1.fcs CD8 Th17 0.0025063
## Method Replicate lp logp
## 1: Manual 1 -3.565 -3.594
## 2: Manual 2 -3.419 -3.451
## 3: Manual 3 -3.445 -3.477
## 4: Manual 1 -4.184 -4.200
## 5: Manual 2 -4.240 -4.255
## ---
## 1508: OpenCyto 2 -7.853 -7.879
## 1509: OpenCyto 3 -7.947 -7.976
## 1510: OpenCyto 1 -5.357 -5.364
## 1511: OpenCyto 2 -5.284 -5.292
## 1512: OpenCyto 3 -5.982 -5.989
df <- cast(THELPER, Sample + Center + Method ~ Population + Replicate, value = "Proportion")
THELPER <- THELPER[, `:=`(lp, logit(Proportion, adjust = 1e-05))]
THELPER <- THELPER[, `:=`(logp, log(Proportion))]
pops <- levels((THELPER$Population))
setkey(THELPER, Population)
ggplot(THELPER[pops[c(10, 11, 12)]]) + geom_boxplot(aes(y = Proportion, x = Center,
fill = Method)) + facet_grid(Population ~ Sample, scales = "free") + theme(axis.text.x = element_text(angle = 45,
hjust = 1)) + ggtitle("Raw T-helper data")
We fit the mixed model to the T-helper panel.
mer <- lmer(lp ~ Population * Method + (1 | Center/Population) + (1 | Sample/Population),
THELPER[Population != "Lymphocytes"], REML = FALSE, verbose = FALSE)
The T-helper panel seems to have failed, as we have a lot of bias in the Th1 and Th2 populations.
####################################### #######################################
## Sample Center File
## 1349 :441 Baylor :189 1228-1_D1_D01.fcs : 21
## 1369 :441 CIMR :189 1228-2_D2_D02.fcs : 21
## 12828:441 Miami :189 1228-3_D3_D03.fcs : 21
## NHLBI :189 12828_1_D1_D01.fcs : 21
## Stanford:189 12828_1_DC,2f,MONO,2f,NK.fcs: 21
## UCLA :189 12828_1_DcMonNk_D01.fcs : 21
## Yale :189 (Other) :1197
## Population Proportion Method Replicate
## CD14+CD16+ :189 Min. :0.0009 Manual :441 Min. :1
## CD14-Lineage-:189 1st Qu.:0.0958 flowDensity:441 1st Qu.:1
## CD16+CD56+ :189 Median :0.2122 OpenCyto :441 Median :2
## CD16+CD56- :189 Mean :0.2844 Mean :2
## HLADR+ :189 3rd Qu.:0.4610 3rd Qu.:3
## CD11c-CD123+ :189 Max. :0.8980 Max. :3
## CD11c+CD123- :189
## lp logp
## Min. :-6.958 Min. :-6.970
## 1st Qu.:-2.245 1st Qu.:-2.346
## Median :-1.312 Median :-1.550
## Mean :-1.399 Mean :-1.810
## 3rd Qu.:-0.156 3rd Qu.:-0.774
## Max. : 2.176 Max. :-0.108
##
NAs again.m <- melt(DC_MONO, id = c("Sample", "Center", "Population", "Method"), measure = "Proportion")
kable(cast(m, Method ~ Population), format = "html", table.attr = "id=\"dcmono_balance\"")
## Aggregation requires fun.aggregate: length used as default
| CD14+CD16+ | CD14-Lineage- | CD16+CD56+ | CD16+CD56- | HLADR+ | CD11c-CD123+ | CD11c+CD123- | |
|---|---|---|---|---|---|---|---|
| Manual | 63 | 63 | 63 | 63 | 63 | 63 | 63 |
| flowDensity | 63 | 63 | 63 | 63 | 63 | 63 | 63 |
| OpenCyto | 63 | 63 | 63 | 63 | 63 | 63 | 63 |
We are missing some populations.
kable(cast(m, Method ~ Center), format = "html", table.attr = "id=\"dcmono_centers\"")
## Aggregation requires fun.aggregate: length used as default
| Baylor | CIMR | Miami | NHLBI | Stanford | UCLA | Yale | |
|---|---|---|---|---|---|---|---|
| Manual | 63 | 63 | 63 | 63 | 63 | 63 | 63 |
| flowDensity | 63 | 63 | 63 | 63 | 63 | 63 | 63 |
| OpenCyto | 63 | 63 | 63 | 63 | 63 | 63 | 63 |
DC_MONO <- DC_MONO[!Population %in% c("Monocytes", "CD14-Lineage+", "CD14+CD16-")]
DC_MONO <- DC_MONO[, `:=`(Population, factor(Population))]
m <- melt(DC_MONO, id = c("Sample", "Center", "Population", "Method"), measure = "Proportion")
The range of the data looks okay.
range(m$value)=[9.4 × 10-4, 0.898]
DC_MONO[, `:=`(Replicate, gl(nrow(.SD), 1)), list(Sample, Center, Population,
Method)]
## Sample Center File Population Proportion
## 1: 12828 Baylor 12828_1_DC,2f,MONO,2f,NK.fcs CD14+CD16+ 0.01370
## 2: 12828 Baylor 12828_2_DC,2f,MONO,2f,NK.fcs CD14+CD16+ 0.01140
## 3: 12828 Baylor 12828_3_DC,2f,MONO,2f,NK.fcs CD14+CD16+ 0.01180
## 4: 12828 CIMR DC_MONO_NK_12828001.fcs CD14+CD16+ 0.02380
## 5: 12828 CIMR DC_MONO_NK_12828001_001.fcs CD14+CD16+ 0.02530
## ---
## 1319: 1349 Miami lot 1349_D5_D05.fcs CD11c+CD123- 0.16847
## 1320: 1349 Miami lot 1349_D6_D06.fcs CD11c+CD123- 0.22340
## 1321: 1369 Miami lot 1369_D7_D07.fcs CD11c+CD123- 0.11797
## 1322: 1369 Miami lot 1369_D8_D08.fcs CD11c+CD123- 0.11240
## 1323: 1369 Miami lot 1369_D9_D09.fcs CD11c+CD123- 0.08522
## Method Replicate lp logp
## 1: Manual 1 -4.276 -4.290
## 2: Manual 2 -4.462 -4.474
## 3: Manual 3 -4.427 -4.440
## 4: Manual 1 -3.714 -3.738
## 5: Manual 2 -3.651 -3.677
## ---
## 1319: OpenCyto 2 -1.596 -1.781
## 1320: OpenCyto 3 -1.246 -1.499
## 1321: OpenCyto 1 -2.012 -2.137
## 1322: OpenCyto 2 -2.066 -2.186
## 1323: OpenCyto 3 -2.373 -2.463
df <- cast(DC_MONO, Sample + Center + Method ~ Population + Replicate, value = "Proportion")
DC_MONO <- DC_MONO[, `:=`(lp, logit(Proportion, adjust = 1e-05))]
DC_MONO <- DC_MONO[, `:=`(logp, log(Proportion))]
pops <- levels((DC_MONO$Population))
setkey(DC_MONO, Population)
ggplot(DC_MONO[pops[c(1:3)]]) + geom_boxplot(aes(y = Proportion, x = Center,
fill = Method)) + facet_grid(Population ~ Sample, scales = "free") + theme(axis.text.x = element_text(angle = 45,
hjust = 1)) + ggtitle("Raw DC/Mono/NK data")
We fit the mixed model to the DC/Mono/NK panel.
mer <- lmer(lp ~ Population * Method + (1 | Center/Population) + (1 | Sample/Population),
DC_MONO[Population != "Lymphocytes"], REML = FALSE, verbose = FALSE)
####################################### #######################################
## Sample Center File Population
## 1349 :210 Baylor :90 1228-1_B1_B01.fcs : 10 Lo127Hi25 :126
## 1369 :210 CIMR :90 1228-2_B2_B02.fcs : 10 Naive :126
## 12828:210 Miami :90 1228-3_B3_B03.fcs : 10 Memory :126
## NHLBI :90 12828_1_B1_B01.fcs : 10 Total Treg:126
## Stanford:90 12828_1_T REG.fcs : 10 Activated :126
## UCLA :90 12828_1_Treg_B01.fcs: 10
## Yale :90 (Other) :570
## Proportion Method Replicate lp
## Min. :0.00034 Manual :315 Min. :1 Min. :-7.95
## 1st Qu.:0.00845 OpenCyto:315 1st Qu.:1 1st Qu.:-4.76
## Median :0.03289 Median :2 Median :-3.38
## Mean :0.04084 Mean :2 Mean :-3.79
## 3rd Qu.:0.06271 3rd Qu.:3 3rd Qu.:-2.70
## Max. :0.17197 Max. :3 Max. :-1.57
##
## logp
## Min. :-7.98
## 1st Qu.:-4.77
## Median :-3.42
## Mean :-3.83
## 3rd Qu.:-2.77
## Max. :-1.76
##
NAs again.m <- melt(TREG, id = c("Sample", "Center", "Population", "Method"), measure = "Proportion")
kable(cast(m, Method ~ Population), format = "html", table.attr = "id=\"treg_balance\"")
## Aggregation requires fun.aggregate: length used as default
| Lo127Hi25 | Naive | Memory | Total Treg | Activated | |
|---|---|---|---|---|---|
| Manual | 63 | 63 | 63 | 63 | 63 |
| OpenCyto | 63 | 63 | 63 | 63 | 63 |
Populations are balanced.
kable(cast(m, Method ~ Center), format = "html", table.attr = "id=\"treg_centers\"")
## Aggregation requires fun.aggregate: length used as default
| Baylor | CIMR | Miami | NHLBI | Stanford | UCLA | Yale | |
|---|---|---|---|---|---|---|---|
| Manual | 45 | 45 | 45 | 45 | 45 | 45 | 45 |
| OpenCyto | 45 | 45 | 45 | 45 | 45 | 45 | 45 |
Centers are also balanced
We drop Lymphocytes, CD3 and CD4.
TREG <- TREG[!Population %in% c("Lymphocytes", "CD3", "CD4")]
TREG <- TREG[, `:=`(Population, factor(Population))]
m <- melt(TREG, id = c("Sample", "Center", "Population", "Method"), measure = "Proportion")
The range of the data looks okay.
range(m$value)=[3.41 × 10-4, 0.172]
TREG[, `:=`(Replicate, gl(nrow(.SD), 1)), list(Sample, Center, Population, Method)]
## Sample Center File Population Proportion Method
## 1: 12828 Baylor 12828_1_T REG.fcs Lo127Hi25 0.131000 Manual
## 2: 12828 Baylor 12828_2_T REG.fcs Lo127Hi25 0.127000 Manual
## 3: 12828 Baylor 12828_3_T REG.fcs Lo127Hi25 0.130000 Manual
## 4: 12828 CIMR TREG_12828_P1.fcs Lo127Hi25 0.113000 Manual
## 5: 12828 CIMR TREG_12828_001_P1.fcs Lo127Hi25 0.109000 Manual
## ---
## 626: 1349 CIMR TREG_1349_002_P1.fcs Activated 0.008380 OpenCyto
## 627: 1349 CIMR TREG_1349_P1.fcs Activated 0.008672 OpenCyto
## 628: 1369 CIMR TREG_1369_001_P1.fcs Activated 0.000958 OpenCyto
## 629: 1369 CIMR TREG_1369_002_P1.fcs Activated 0.001086 OpenCyto
## 630: 1369 CIMR TREG_1369_P1.fcs Activated 0.000618 OpenCyto
## Replicate lp logp
## 1: 1 -1.892 -2.033
## 2: 2 -1.928 -2.064
## 3: 3 -1.901 -2.040
## 4: 1 -2.060 -2.180
## 5: 2 -2.101 -2.216
## ---
## 626: 2 -4.772 -4.782
## 627: 3 -4.738 -4.748
## 628: 1 -6.939 -6.951
## 629: 2 -6.815 -6.825
## 630: 3 -7.372 -7.389
df <- cast(TREG, Sample + Center + Method ~ Population + Replicate, value = "Proportion")
TREG <- TREG[, `:=`(lp, logit(Proportion, adjust = 1e-05))]
TREG <- TREG[, `:=`(logp, log(Proportion))]
pops <- levels((TREG$Population))
setkey(TREG, Population)
ggplot(TREG[pops[c(1:3, 5)]]) + geom_boxplot(aes(y = Proportion, x = Center,
fill = Method)) + facet_grid(Population ~ Sample, scales = "free") + theme(axis.text.x = element_text(angle = 45,
hjust = 1)) + ggtitle("Raw DC/Mono/NK data")
We fit the mixed model to the T-reg panel.
mer <- lmer(lp ~ Population * Method + (1 | Center/Population) + (1 | Sample/Population),
TREG[Population != "Lymphocytes"], REML = FALSE, verbose = FALSE)
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale. Scale for 'y' is already present. Adding
## another scale for 'y', which will replace the existing scale.